From fd2f96c53d93f959acd46beade710aff6c020957 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Mon, 27 Aug 2018 22:57:54 +0200
Subject: [PATCH] babl-fish-reference: do some conversions in single precision
 float

This permits these conversions to go through floating point instead of double
precision when non of the involved format have more than 32bit precision,
this might be determinetal to u32 but is right for u8 and u16 which we rely
most on.

We also disable the missing fast path messages, since now many references;
through internal shortcuts are faster than some long chains, perhaps some
of these reference fast paths should be split out as new categories of fishes.
---
 babl/babl-fish-path.c      |   2 +-
 babl/babl-fish-reference.c | 282 +++++++++++++++++++++++++++++++++++--
 babl/base/type-float.c     |  32 +++++
 babl/base/type-u16.c       |   8 +-
 babl/base/type-u8.c        |   8 +-
 5 files changed, 314 insertions(+), 18 deletions(-)

diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index 2e87268..14b759f 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -627,7 +627,7 @@ babl_fish_path2 (const Babl *source,
       babl_free (babl);
       babl_mutex_unlock (babl_format_mutex);
 
-      _babl_fish_missing_fast_path_warning (source, destination);
+      //_babl_fish_missing_fast_path_warning (source, destination);
 
       return NULL;
     }
diff --git a/babl/babl-fish-reference.c b/babl/babl-fish-reference.c
index 27040ae0..12b93d0 100644
--- a/babl/babl-fish-reference.c
+++ b/babl/babl-fish-reference.c
@@ -279,6 +279,135 @@ convert_from_double (BablFormat *destination_fmt,
   babl_free (dst_img);
 }
 
+static void
+convert_to_float (BablFormat *source_fmt,
+                  const char *source_buf,
+                  char       *float_buf,
+                  int         n)
+{
+  int        i;
+
+  BablImage *src_img;
+  BablImage *dst_img;
+
+  src_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+  dst_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+
+  dst_img->type[0]  = (BablType *) babl_type_from_id (BABL_FLOAT);
+  dst_img->pitch[0] =
+    (dst_img->type[0]->bits / 8) * source_fmt->model->components;
+  dst_img->stride[0] = 0;
+
+  src_img->type[0]   = (BablType *) babl_type_from_id (BABL_FLOAT);
+  src_img->pitch[0]  = source_fmt->bytes_per_pixel;
+  src_img->stride[0] = 0;
+
+  {
+  /* i is dest position */
+  for (i = 0; i < source_fmt->model->components; i++)
+    {
+      int j;
+      int found = 0;
+
+      dst_img->data[0] =
+        float_buf + (dst_img->type[0]->bits / 8) * i;
+
+      src_img->data[0] = (char *)source_buf;
+
+      /* j is source position */
+      for (j = 0; j < source_fmt->components; j++)
+        {
+          src_img->type[0] = source_fmt->type[j];
+
+          if (source_fmt->component[j] ==
+              source_fmt->model->component[i])
+            {
+              babl_conversion_process (assert_conversion_find (src_img->type[0], dst_img->type[0]),
+                                       (void*)src_img, (void*)dst_img, n);
+              found = 1;
+              break;
+            }
+
+          src_img->data[0] += src_img->type[0]->bits / 8;
+        }
+
+      if (!found)
+        {
+          char *dst_ptr = dst_img->data[0];
+          float value;
+
+          value = source_fmt->model->component[i]->instance.id == BABL_ALPHA ? 1.0 : 0.0;
+
+          for (j = 0; j < n; j++)
+            {
+              float *dst_component = (float *) dst_ptr;
+
+              *dst_component = value;
+              dst_ptr += dst_img->pitch[0];
+            }
+        }
+    }
+  }
+  babl_free (src_img);
+  babl_free (dst_img);
+}
+
+
+static void
+convert_from_float (BablFormat *destination_fmt,
+                     char      *destination_float_buf,
+                     char      *destination_buf,
+                     int        n)
+{
+  int        i;
+
+  BablImage *src_img;
+  BablImage *dst_img;
+
+  src_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+  dst_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+
+  src_img->type[0]   = (BablType *) babl_type_from_id (BABL_FLOAT);
+  src_img->pitch[0]  = (src_img->type[0]->bits / 8) * destination_fmt->model->components;
+  src_img->stride[0] = 0;
+
+  dst_img->data[0]  = destination_buf;
+  dst_img->type[0]  = (BablType *) babl_type_from_id (BABL_FLOAT);
+  dst_img->pitch[0] = destination_fmt->bytes_per_pixel;
+  dst_img->stride[0] = 0;
+
+  for (i = 0; i < destination_fmt->components; i++)
+    {
+      int j;
+
+      dst_img->type[0] = destination_fmt->type[i];
+
+      for (j = 0; j < destination_fmt->model->components; j++)
+        {
+          if (destination_fmt->component[i] ==
+              destination_fmt->model->component[j])
+            {
+              src_img->data[0] =
+                destination_float_buf + (src_img->type[0]->bits / 8) * j;
+
+              babl_conversion_process (assert_conversion_find (src_img->type[0],
+                                       dst_img->type[0]),
+                                       (void*)src_img, (void*)dst_img, n);
+              break;
+            }
+        }
+
+      dst_img->data[0] += dst_img->type[0]->bits / 8;
+    }
+  babl_free (src_img);
+  babl_free (dst_img);
+}
+
+
 
 static void
 ncomponent_convert_to_double (BablFormat       *source_fmt,
@@ -350,6 +479,77 @@ ncomponent_convert_from_double (BablFormat *destination_fmt,
 }
 
 
+static void
+ncomponent_convert_to_float (BablFormat       *source_fmt,
+                             char             *source_buf,
+                             char             *source_float_buf,
+                             int               n)
+{
+  BablImage *src_img;
+  BablImage *dst_img;
+
+  src_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+  dst_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+
+  dst_img->type[0]  = (BablType *) babl_type_from_id (BABL_FLOAT);
+  dst_img->pitch[0] = (dst_img->type[0]->bits / 8);
+  dst_img->stride[0] = 0;
+
+  src_img->data[0] = source_buf;
+  src_img->type[0] = source_fmt->type[0];
+  src_img->pitch[0] = source_fmt->type[0]->bits / 8;
+  src_img->stride[0] = 0;
+
+  dst_img->data[0] = source_float_buf;
+
+  babl_conversion_process (
+    assert_conversion_find (src_img->type[0], dst_img->type[0]),
+    (void*)src_img, (void*)dst_img,
+    n * source_fmt->components);
+  babl_free (src_img);
+  babl_free (dst_img);
+}
+
+static void
+ncomponent_convert_from_float (BablFormat *destination_fmt,
+                               char       *destination_float_buf,
+                               char       *destination_buf,
+                               int         n)
+{
+  BablImage *src_img;
+  BablImage *dst_img;
+
+  src_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+  dst_img = (BablImage *) babl_image_new (
+    babl_component_from_id (BABL_GRAY_LINEAR), NULL, 1, 0, NULL);
+
+  src_img->type[0]   = (BablType *) babl_type_from_id (BABL_FLOAT);
+  src_img->pitch[0]  = (src_img->type[0]->bits / 8);
+  src_img->stride[0] = 0;
+
+  dst_img->data[0]  = destination_buf;
+  dst_img->type[0]  = (BablType *) babl_type_from_id (BABL_FLOAT);
+  dst_img->pitch[0] = destination_fmt->type[0]->bits/8;
+  dst_img->stride[0] = 0;
+
+  dst_img->type[0] = destination_fmt->type[0];
+  src_img->data[0] = destination_float_buf;
+
+  babl_conversion_process (
+    assert_conversion_find (src_img->type[0], dst_img->type[0]),
+    (void*)src_img, (void*)dst_img,
+    n * destination_fmt->components);
+
+  dst_img->data[0] += dst_img->type[0]->bits / 8;
+  babl_free (src_img);
+  babl_free (dst_img);
+}
+
+
+
 static int
 process_to_n_component (const Babl  *babl,
                         const char *source,
@@ -388,12 +588,33 @@ process_to_n_component (const Babl  *babl,
   return 0;
 }
 
+static int compatible_components (const BablFormat *a,
+                                  const BablFormat *b)
+{
+  int i;
+  if (a->components != b->components)
+    return 0;
+  for (i = 0; i < a->components; i++)
+   if (a->component[i] != b->component[i])
+     return 0;
+  return 1;
+}
+
 static void
 process_same_model (const Babl  *babl,
                     const char *source,
                     char       *destination,
                     long        n)
 {
+  if (BABL (babl->fish.source)== BABL (babl->fish.destination))
+  {
+    if (source == destination)
+    {
+      memcpy (destination, source, n * babl->fish.source->format.bytes_per_pixel);
+    }
+    return;
+  }
+
   void *double_buf;
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
@@ -402,27 +623,68 @@ process_same_model (const Babl  *babl,
                                  BABL (babl->fish.source)->format.components));
 #undef MAX
 
-  if ((BABL (babl->fish.source)->format.components ==
-       BABL (babl->fish.destination)->format.components)
-      && (BABL (babl->fish.source)->format.model->components !=
-          BABL (babl->fish.source)->format.components))
+  if (compatible_components ((void*)babl->fish.source,
+                             (void*)babl->fish.destination))
     {
-      ncomponent_convert_to_double (
+      if (BABL (babl->fish.source)->format.type[0]->bits < 32 ||
+          BABL (babl->fish.destination)->format.type[0]->bits < 32)
+      {
+        ncomponent_convert_to_float (
+          (BablFormat *) BABL (babl->fish.source),
+          (char *) source,
+          double_buf,
+          n
+        );
+
+          ncomponent_convert_from_float (
+            (BablFormat *) BABL (babl->fish.destination),
+            (void*) double_buf,
+            (char *) destination,
+            n
+          );
+
+      }
+      else
+      {
+        ncomponent_convert_to_double (
+          (BablFormat *) BABL (babl->fish.source),
+          (char *) source,
+          double_buf,
+          n
+        );
+
+        ncomponent_convert_from_double (
+          (BablFormat *) BABL (babl->fish.destination),
+          double_buf,
+          (char *) destination,
+          n
+        );
+      }
+    }
+  else
+    {
+      if (BABL (babl->fish.source)->format.type[0]->bits < 32 ||
+          BABL (babl->fish.destination)->format.type[0]->bits < 32)
+      {
+
+      convert_to_float (
         (BablFormat *) BABL (babl->fish.source),
         (char *) source,
         double_buf,
         n
       );
 
-      ncomponent_convert_from_double (
+      convert_from_float (
         (BablFormat *) BABL (babl->fish.destination),
         double_buf,
         (char *) destination,
         n
       );
-    }
-  else
-    {
+
+      }
+      else
+      {
+
       convert_to_double (
         (BablFormat *) BABL (babl->fish.source),
         (char *) source,
@@ -436,6 +698,8 @@ process_same_model (const Babl  *babl,
         (char *) destination,
         n
       );
+
+      }
     }
   babl_free (double_buf);
 }
diff --git a/babl/base/type-float.c b/babl/base/type-float.c
index 750d836..4a09f3e 100644
--- a/babl/base/type-float.c
+++ b/babl/base/type-float.c
@@ -57,6 +57,31 @@ convert_float_double (BablConversion *conversion,
     }
 }
 
+static long
+convert_float_float (const Babl *babl,
+                     char       *src,
+                     char       *dst,
+                     int         src_pitch,
+                     int         dst_pitch,
+                     long        n)
+{
+  if (src_pitch == 32 &&
+      dst_pitch == 32)
+    {
+      memcpy (dst, src, n / 4);
+      return n;
+    }
+
+  while (n--)
+    {
+      (*(float *) dst) = (*(float *) src);
+      dst              += dst_pitch;
+      src              += src_pitch;
+    }
+  return n;
+}
+
+
 void
 babl_base_type_float (void)
 {
@@ -79,4 +104,11 @@ babl_base_type_float (void)
     "plane", convert_double_float,
     NULL
   );
+
+  babl_conversion_new (
+    babl_type_from_id (BABL_FLOAT),
+    babl_type_from_id (BABL_FLOAT),
+    "plane", convert_float_float,
+    NULL
+  );
 }
diff --git a/babl/base/type-u16.c b/babl/base/type-u16.c
index 68a99bc..6841c9b 100644
--- a/babl/base/type-u16.c
+++ b/babl/base/type-u16.c
@@ -112,8 +112,8 @@ MAKE_CONVERSIONS (u16, 0.0, 1.0, 0, UINT16_MAX)
 
 static inline void
 convert_float_u16_scaled (BablConversion *conversion,
-                          float    min_val,
-                          float    max_val,
+                          double    min_val,
+                          double    max_val,
                           uint16_t min,
                           uint16_t max,
                           char    *src,
@@ -142,8 +142,8 @@ convert_float_u16_scaled (BablConversion *conversion,
 
 static inline void
 convert_u16_float_scaled (BablConversion *conversion,
-                          float   min_val,
-                          float   max_val,
+                          double   min_val,
+                          double   max_val,
                           uint16_t min,
                           uint16_t max,
                           char    *src,
diff --git a/babl/base/type-u8.c b/babl/base/type-u8.c
index 1cf0629..2ad561b 100644
--- a/babl/base/type-u8.c
+++ b/babl/base/type-u8.c
@@ -115,8 +115,8 @@ MAKE_CONVERSIONS (u8_chroma, -0.5, 0.5, 16, 240)
 
 static inline void
 convert_float_u8_scaled (BablConversion *c,
-                         float        min_val,
-                         float        max_val,
+                         double        min_val,
+                         double        max_val,
                          unsigned char min,
                          unsigned char max,
                          char         *src,
@@ -145,8 +145,8 @@ convert_float_u8_scaled (BablConversion *c,
 
 static inline void
 convert_u8_float_scaled (BablConversion *c,
-                          float        min_val,
-                          float        max_val,
+                          double        min_val,
+                          double        max_val,
                           unsigned char min,
                           unsigned char max,
                           char         *src,
-- 
2.30.2